library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)

1) 18S amplicons

1.1) Data overview

Let’s read the dataset and remove the samples containing less than 1154 reads:

setwd("~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/")

#read data 
tb18_tax <- read.table(file="~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/input/otu_table99_SUR_miTags_18S_classif.txt", head=TRUE, fill=TRUE)

#table dimensions and format before setting column names
dim(tb18_tax) # 8642   58
tb18_tax[1:5,1:5]

row.names(tb18_tax)<-tb18_tax[,1]
tb18_tax<-tb18_tax[,-1]
tb18_tax[is.na(tb18_tax)]<-"NA"

dim(tb18_tax) #8642   57
tb18_tax[1:5,1:5]

#table with taxonomic classification alone:
tb18_class <- tb18_tax[,56:57]
dim(tb18_class) #8642    2
tb18_class[1:5,1:2]

#table with occurence data alone
tb18_tax_occur <- tb18_tax[,1:55]
dim(tb18_tax_occur) #8642   55
tb18_tax_occur[1:5,1:5]

amplicons_per_sample_tb18<-colSums(tb18_tax_occur)
summary(amplicons_per_sample_tb18) #median of amplicons per sample = ~3086)
#we'll remove all samples with less than 1154 reads.
amplicons_per_sample_tb18[which(colSums(tb18_tax_occur)<1154)]

#remove samples with less than 1154 reads
tb18_tax_occur_min1154 <- tb18_tax_occur[,colSums(tb18_tax_occur) >= 1154]
dim(tb18_tax_occur_min1154) #8642   45

#remove samples with omitted in 16S dataset (so that we can compare the relative abundance of 16S and 18S OTUs considering the same samples)
tb18_tax_occur_min1154<-subset(tb18_tax_occur_min1154, select=-c(TARA_70_SUR_0d2_3,TARA_67_SUR_0d2_3))


dim(tb18_tax_occur_min1154) #8642   43

#let's check if we loose any OTU when excluding the mentioned stations.
tb18_tax_occur_min1154_no_cero<-tb18_tax_occur_min1154[-(which(rowSums(tb18_tax_occur_min1154)==0)),]
dim(tb18_tax_occur_min1154_no_cero)

Table dimensions and content outline:

## [1] 8642   43
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  0                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  0                  0
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  1                  0
## OTU_4                  0                  0
## OTU_5                  0                  0

Minimum number of reads per station:

min(colSums(tb18_tax_occur_min1154)) 
## [1] 1154

Maximum number of reads per station:

max(colSums(tb18_tax_occur_min1154)) 
## [1] 23781

Identification of stations with higher number of reads:

amplicons_per_sample<-colSums(tb18_tax_occur_min1154)
amplicons_per_sample[which(colSums(tb18_tax_occur_min1154)>20000)]
## TARA_84_SUR_0d2_3 
##             23781

Overall reads per sample:

1.2) Normalization

Let’s normalize the original dataset by randomly subsampling 1154 reads in each station:

tb18_tax_occur_min1154_t<-t(tb18_tax_occur_min1154)
tb18_tax_occur_ss1154<-rrarefy(tb18_tax_occur_min1154_t, 1154)

The normalized table shows the following dimensions and format:

## [1]   43 8642
##                    OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3     0     0     0     0     0
## TARA_109_SUR_0d2_3     0     0     0     0     0
## TARA_110_SUR_0d2_3     0     0     0     0     0
## TARA_111_SUR_0d2_3     0     0     1     0     0
## TARA_112_SUR_0d2_3     0     0     0     0     0

Its content fits with the expected normalization values (1154 reads per station):

rowSums(tb18_tax_occur_ss1154)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3 
##               1154               1154               1154 
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3 
##               1154               1154               1154 
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3 
##               1154               1154               1154 
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3 
##               1154               1154               1154 
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3 
##               1154               1154               1154 
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3 
##               1154               1154               1154 
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3 
##               1154               1154               1154 
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3  TARA_56_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_57_SUR_0d2_3  TARA_62_SUR_0d2_3  TARA_64_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_65_SUR_0d2_3  TARA_66_SUR_0d2_3  TARA_68_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_72_SUR_0d2_3  TARA_76_SUR_0d2_3  TARA_78_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_82_SUR_0d2_3  TARA_84_SUR_0d2_3  TARA_85_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_96_SUR_0d2_3  TARA_98_SUR_0d2_3  TARA_99_SUR_0d2_3 
##               1154               1154               1154 
##             MP0311             MP1517             MP1672 
##               1154               1154               1154 
##             MP2821 
##               1154

Let’s check out how many OTUs don’t appear in the new table:

length(which(colSums(tb18_tax_occur_ss1154)==0))
## [1] 2942

There are 2942 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:

tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154[,-(which(colSums(tb18_tax_occur_ss1154)==0))]
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154_no_cero[mixedorder(row.names(tb18_tax_occur_ss1154_no_cero)),]
dim(tb18_tax_occur_ss1154_no_cero)
## [1]   43 5700

Datasets summary:

dim(tb18_tax) 
## [1] 8642   57
dim(tb18_tax_occur) 
## [1] 8642   55
dim(tb18_tax_occur_ss1154_no_cero) 
## [1]   43 5700

1.3) General community analysis

1.3.1) Richness and evenness (Shannon index)

Most of the samples take Shannon Index values around 6:

1.3.2) Richness: OTU number

Lowest number of OTUs per sample:

## [1] 348

Maximum number of OTUs per sample:

## [1] 791

In most of the samples, we can identify between 600 and 700 OTUs:

plot(sort(OTUs_per_sample_18S_tax_occur_ss1154), pch=19)

boxplot(OTUs_per_sample_18S_tax_occur_ss1154, pch=19)

1.3.3) Index of evenness

1.3.3.1) Pielou’s index

pielou_evenness_18S_tax_occur_ss1154<-tb18_tax_occur_ss1154_div/log(OTUs_per_sample_18S_tax_occur_ss1154)

The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.

The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.

plot(sort(pielou_evenness_18S_tax_occur_ss1154), pch=19)

boxplot(pielou_evenness_18S_tax_occur_ss1154, pch=19)

The OTU_38, with 874 reads, is the most abundant in the overall dataset:

head(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T), n=10L)
##   OTU_38 OTU_3677 OTU_5415  OTU_751 OTU_3374 OTU_2494 OTU_2631 OTU_3569 
##      874      695      571      524      429      361      310      282 
## OTU_3177 OTU_1032 
##      270      267

Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:

plot(log(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T)), pch=19)

1.3.4) Abundance Models

1.3.4.1) Rank-Abundance or Dominance/Diversity Model (“radfit”)

The OTUs abundance distribution fits relativelly close to log-normal model.

1.3.4.2) Preston’s Lognormal Model

According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:

tb18_tax_occur_ss1154_prestonfit<-prestonfit(colSums(tb18_tax_occur_min1154_t))
plot(tb18_tax_occur_ss1154_prestonfit, main="Pooled species")

veiledspec(tb18_tax_occur_ss1154_prestonfit)
## Extrapolated     Observed       Veiled 
##     9937.816     8349.000     1588.816

When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:

## Extrapolated     Observed       Veiled 
##     9734.371     8349.000     1385.371

1.3.5) Rarefaction curves of rarefied and non-rarefied datasets

rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss1154_no_cero)))

#tb18_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb18_rarecurve_step1000_10000

tb18_rarecurve_step100_5700<-rarecurve(rarec_input, step = 100, 5700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=100 & ss=5700\n(5,719 OTUs; 49,622 reads)\n")

#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 100, 8642, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=100 & ss=8642\n(8,642 OTUs; 244,184 reads)\n")

1.3.6) Beta diversity

1.3.6.1) Dissimilarity matrix using Bray-Curtis index:

The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.

1.3.6.2) Hierarchical clustering

The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.

(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)

1.3.6.3) Non-metric multidimensional scaling

We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.

## 
## Call:
## monoMDS(dist = tb18_tax_occur_ss1154_no_cero.bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss1154_no_cero, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.1463937 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 70 iterations: Stress nearly unchanged (ratio > sratmax)

When implementing a most robut function for computing NMDS plots, the result is quiet the same:

## Run 0 stress 0.1465317 
## Run 1 stress 0.1710814 
## Run 2 stress 0.1529505 
## Run 3 stress 0.1529514 
## Run 4 stress 0.1463796 
## ... New best solution
## ... Procrustes: rmse 0.0160749  max resid 0.09410719 
## Run 5 stress 0.1450501 
## ... New best solution
## ... Procrustes: rmse 0.03346756  max resid 0.2057251 
## Run 6 stress 0.1544138 
## Run 7 stress 0.1529513 
## Run 8 stress 0.1464534 
## Run 9 stress 0.1645093 
## Run 10 stress 0.1464532 
## Run 11 stress 0.1529505 
## Run 12 stress 0.145052 
## ... Procrustes: rmse 0.0005393131  max resid 0.002027162 
## ... Similar to previous best
## Run 13 stress 0.1450487 
## ... New best solution
## ... Procrustes: rmse 0.002278156  max resid 0.01146875 
## Run 14 stress 0.154718 
## Run 15 stress 0.154417 
## Run 16 stress 0.1450194 
## ... New best solution
## ... Procrustes: rmse 0.003631269  max resid 0.01466868 
## Run 17 stress 0.1546418 
## Run 18 stress 0.1522817 
## Run 19 stress 0.1522815 
## Run 20 stress 0.1544133 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available

1.4) Geographical analysis

## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used

Working datasets:

  1. Community matrix: tb18_tax_occur_ss1154_no_cero
dim(tb18_tax_occur_ss1154_no_cero)
## [1]   43 5728
tb18_tax_occur_ss1154_no_cero[1:5, 1:5]
##                    OTU_1 OTU_2 OTU_3 OTU_5 OTU_6
## MP0311                 0     0     0     0     0
## MP1517                 0     0     0     0     1
## MP1672                 0     0     0     0     0
## MP2821                 0     3     0     0     0
## TARA_102_SUR_0d2_3     0     0     0     0     0
  1. Community Bray-Curtis: tb18_tax_occur_ss1154_no_cero.bray
tb18_tax_occur_ss1154_no_cero.bray<-as.matrix(tb18_tax_occur_ss1154_no_cero.bray)
## [1] 43 43
  1. Stations distances in km: geo_distances_MP_18S
dim(geo_distances_MP_18S)
## [1] 43 43

Communities quickly change their composition across geographical distances:

plot(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")

1.4.1) Mantel correlograms

Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.

mantel(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss1154_no_cero.bray) 
## 
## Mantel statistic r: 0.1186 
##       Significance: 0.018 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0646 0.0894 0.1075 0.1334 
## Permutation: free
## Number of permutations: 999

Maximum distance between samples:

## [1] 19543.94

Minimum distance between samples:

## [1] 0

Correlograms:

MP_18s_ss1154_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss1154_mantel_correl_by_1000km)

MP_18s_ss1154_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss1154_mantel_correl_by_100km)

1.5) Abundance vs. occurence

In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).

Regionally abundant OTUs (relative abundance over 0.1%):

tb18_class_prov<-cbind(otu_names=row.names(tb18_class),tb18_class)
##     otu_names mean_rabund perc_occur                  classif
## 1    OTU_1001 0.001188989  60.465116                     <NA>
## 2    OTU_1003 0.001027770  62.790698                     <NA>
## 3    OTU_1012 0.001007618  60.465116                     <NA>
## 4    OTU_1018 0.001874169  13.953488         Prymnesiophyceae
## 5    OTU_1032 0.005300069  72.093023         Prymnesiophyceae
## 6    OTU_1047 0.001813712  67.441860                     <NA>
## 7    OTU_1066 0.001330055  58.139535         Prymnesiophyceae
## 8    OTU_1087 0.001289751  55.813953                     <NA>
## 9    OTU_1261 0.001128532  58.139535                     <NA>
## 10   OTU_1276 0.001007618  48.837209                     <NA>
## 11   OTU_1343 0.001712950  69.767442              Dinophyceae
## 12   OTU_1383 0.003627423  88.372093                     <NA>
## 13   OTU_1395 0.001330055  25.581395                     <NA>
## 14   OTU_1446 0.001289751  69.767442                     <NA>
## 15   OTU_1456 0.002901939  72.093023              Dinophyceae
## 16   OTU_1496 0.001551731  69.767442                     <NA>
## 17   OTU_1551 0.001350208  65.116279                     <NA>
## 18   OTU_1597 0.003385595  69.767442         Prymnesiophyceae
## 19   OTU_1628 0.001229293  58.139535              Dinophyceae
## 20   OTU_1635 0.001450969  69.767442                     <NA>
## 21   OTU_1641 0.001612188  74.418605              Dinophyceae
## 22   OTU_1694 0.001289751  55.813953                     <NA>
## 23   OTU_1746 0.001793559  53.488372         Prymnesiophyceae
## 24   OTU_1825 0.001108379  55.813953                     <NA>
## 25   OTU_1833 0.002720567  72.093023                     <NA>
## 26   OTU_1901 0.001047922  48.837209                     <NA>
## 27   OTU_1911 0.001068075  48.837209                     <NA>
## 28   OTU_1921 0.003687880  76.744186         Prymnesiophyceae
## 29   OTU_1929 0.001330055  48.837209          Mamiellophyceae
## 30   OTU_1975 0.001027770  58.139535                     <NA>
## 31   OTU_1997 0.001471122  74.418605         Prymnesiophyceae
## 32   OTU_2031 0.002377978  67.441860                     <NA>
## 33   OTU_2043 0.001632340  60.465116                     <NA>
## 34   OTU_2078 0.001209141  67.441860                     <NA>
## 35   OTU_2086 0.001068075  65.116279              Dinophyceae
## 36   OTU_2089 0.001108379  46.511628                     <NA>
## 37   OTU_2120 0.001390512  58.139535                     <NA>
## 38   OTU_2124 0.001229293  72.093023         Dictyochophyceae
## 39   OTU_2158 0.001168836  67.441860         Prymnesiophyceae
## 40    OTU_216 0.001531579  67.441860              Dinophyceae
## 41   OTU_2189 0.001330055  58.139535                     <NA>
## 42   OTU_2195 0.001733102  72.093023              Dinophyceae
## 43   OTU_2200 0.003728185  86.046512                     <NA>
## 44   OTU_2207 0.001450969  72.093023              Dinophyceae
## 45   OTU_2219 0.001027770  67.441860         Prymnesiophyceae
## 46   OTU_2269 0.001753255  86.046512                     <NA>
## 47   OTU_2305 0.001612188  74.418605                     <NA>
## 48   OTU_2456 0.001128532  58.139535                     <NA>
## 49   OTU_2466 0.001007618  46.511628                     <NA>
## 50   OTU_2494 0.006932409  69.767442         Prymnesiophyceae
## 51   OTU_2518 0.001551731  53.488372                     <NA>
## 52   OTU_2553 0.001974930  83.720930                     <NA>
## 53    OTU_256 0.001773407  76.744186                     <NA>
## 54   OTU_2594 0.001309903  69.767442                     <NA>
## 55   OTU_2631 0.006106163  74.418605         Prymnesiophyceae
## 56   OTU_2648 0.001712950  83.720930                     <NA>
## 57   OTU_2654 0.002115997  65.116279            Pelagophyceae
## 58   OTU_2686 0.001632340  72.093023                     <NA>
## 59   OTU_2730 0.001289751  53.488372                     <NA>
## 60   OTU_2853 0.001350208  69.767442         Prymnesiophyceae
## 61   OTU_2868 0.001712950  48.837209                     <NA>
## 62   OTU_2873 0.001148684  72.093023                     <NA>
## 63   OTU_2887 0.001350208  69.767442                     <NA>
## 64   OTU_2906 0.001027770  48.837209                     <NA>
## 65   OTU_2910 0.004070775  72.093023                     <NA>
## 66    OTU_295 0.001027770  58.139535                     <NA>
## 67   OTU_2965 0.001733102  67.441860                     <NA>
## 68   OTU_2966 0.001128532  58.139535                     <NA>
## 69   OTU_2986 0.001249446  60.465116                     <NA>
## 70   OTU_2996 0.002055540  69.767442                     <NA>
## 71   OTU_3018 0.001612188  72.093023                     <NA>
## 72   OTU_3038 0.002539196  76.744186                     <NA>
## 73   OTU_3056 0.001934626  72.093023                     <NA>
## 74   OTU_3124 0.001450969  58.139535                     <NA>
## 75   OTU_3177 0.005682963  48.837209          Mamiellophyceae
## 76   OTU_3196 0.001168836  58.139535         Prymnesiophyceae
## 77   OTU_3227 0.003466205  34.883721 Prasinophyceae_clade-VII
## 78   OTU_3271 0.002357825  32.558140 Prasinophyceae_clade-VII
## 79   OTU_3282 0.004211842  37.209302          Mamiellophyceae
## 80   OTU_3374 0.008826730  93.023256            Pelagophyceae
## 81   OTU_3430 0.001390512  34.883721          Mamiellophyceae
## 82   OTU_3454 0.003607271  72.093023                     <NA>
## 83   OTU_3560 0.001974930  62.790698            Pelagophyceae
## 84   OTU_3569 0.005159002  53.488372          Mamiellophyceae
## 85   OTU_3602 0.001410665  65.116279                     <NA>
## 86    OTU_363 0.001007618  44.186047                     <NA>
## 87   OTU_3677 0.013884970  81.395349         Prymnesiophyceae
## 88   OTU_3729 0.003909556  79.069767            Pelagophyceae
## 89    OTU_373 0.001007618  51.162791                     <NA>
## 90     OTU_38 0.017492241  88.372093            Pelagophyceae
## 91    OTU_380 0.002156302  83.720930         Prymnesiophyceae
## 92   OTU_3813 0.001007618  39.534884                     <NA>
## 93   OTU_3871 0.002156302  69.767442                     <NA>
## 94   OTU_3926 0.002015235  69.767442            Pelagophyceae
## 95   OTU_4089 0.001430817  37.209302          Mamiellophyceae
## 96   OTU_4090 0.001551731  30.232558          Mamiellophyceae
## 97   OTU_4103 0.002418282  37.209302          Mamiellophyceae
## 98   OTU_4110 0.001027770  41.860465          Mamiellophyceae
## 99   OTU_4123 0.002519044  34.883721 Prasinophyceae_clade-VII
## 100  OTU_4125 0.002599653  79.069767            Pelagophyceae
## 101  OTU_4134 0.001733102  48.837209            Pelagophyceae
## 102  OTU_4144 0.003244529  44.186047          Mamiellophyceae
## 103  OTU_4176 0.003385595  34.883721          Mamiellophyceae
## 104  OTU_4190 0.001249446  30.232558          Mamiellophyceae
## 105  OTU_4203 0.001350208  53.488372            Pelagophyceae
## 106  OTU_4226 0.001672645  37.209302          Mamiellophyceae
## 107  OTU_4227 0.003204224  32.558140 Prasinophyceae_clade-VII
## 108  OTU_4260 0.001793559  41.860465          Mamiellophyceae
## 109   OTU_437 0.001450969  76.744186         Prymnesiophyceae
## 110  OTU_4518 0.002377978  44.186047          Mamiellophyceae
## 111  OTU_4533 0.001551731  39.534884          Mamiellophyceae
## 112   OTU_458 0.001108379  60.465116         Prymnesiophyceae
## 113  OTU_5120 0.004070775  39.534884          Mamiellophyceae
## 114  OTU_5415 0.010761356  25.581395                     <NA>
## 115  OTU_5484 0.001188989  34.883721          Mamiellophyceae
## 116  OTU_5750 0.001108379  25.581395          Mamiellophyceae
## 117   OTU_599 0.001068075  48.837209                     <NA>
## 118  OTU_6181 0.001229293  23.255814                     <NA>
## 119   OTU_628 0.001229293  58.139535              Dinophyceae
## 120  OTU_6608 0.001108379  18.604651                     <NA>
## 121   OTU_717 0.001833864  76.744186              Dinophyceae
## 122   OTU_751 0.010983032  37.209302 Prasinophyceae_clade-VII
## 123   OTU_758 0.001874169  65.116279                     <NA>
## 124   OTU_793 0.001430817  60.465116                     <NA>
## 125   OTU_825 0.001148684  44.186047                     <NA>
## 126   OTU_884 0.001390512  76.744186                     <NA>
## 127   OTU_893 0.002015235  55.813953                     <NA>
## 128   OTU_896 0.002297368  74.418605                     <NA>
## 129   OTU_914 0.001128532  58.139535         Prymnesiophyceae
## 130   OTU_922 0.001229293  58.139535                     <NA>
## 131   OTU_933 0.004796260  90.697674         Prymnesiophyceae
## 132   OTU_938 0.001188989  67.441860                     <NA>
## 133   OTU_949 0.002055540  65.116279         Dictyochophyceae
## 134   OTU_950 0.002317520   4.651163         Prymnesiophyceae
## 135   OTU_973 0.001934626  67.441860                     <NA>
## 136   OTU_974 0.002680263  67.441860         Prymnesiophyceae
## 137   OTU_983 0.001269598  55.813953         Dictyochophyceae
## 138   OTU_990 0.001188989  67.441860                     <NA>
## [1] 138   4

Proportion of regionally abundant OTUs (%):

## [1] 2.409218

Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):

otu_tb18_ss1154_cosmop_sorted_prov<-cbind(otu_names=row.names(otu_tb18_ss1154_cosmop_sorted),otu_tb18_ss1154_cosmop_sorted)
##    otu_names mean_rabund perc_occur          classif
## 1   OTU_1383 0.003627423   88.37209             <NA>
## 2   OTU_2200 0.003728185   86.04651             <NA>
## 3   OTU_2269 0.001753255   86.04651             <NA>
## 4   OTU_2553 0.001974930   83.72093             <NA>
## 5   OTU_2648 0.001712950   83.72093             <NA>
## 6   OTU_3374 0.008826730   93.02326    Pelagophyceae
## 7   OTU_3677 0.013884970   81.39535 Prymnesiophyceae
## 8     OTU_38 0.017492241   88.37209    Pelagophyceae
## 9    OTU_380 0.002156302   83.72093 Prymnesiophyceae
## 10   OTU_933 0.004796260   90.69767 Prymnesiophyceae
## [1] 10  4

Number and proportion (%) of cosmopolitan OTUs:

## [1] 10
## [1] 0.174581

Number and proportion (%) of rare OTUs:

## [1] 0
## [1] 0

1.6) Taxonomic composition analysis

1.6.1) Normalized data

1.6.1.1) Absolute values

Let’s add the taxonomic classification by merging “tb18_tax_occur_ss1154_no_cero” with “tb18_tax”:

dim(tb18_ss1154_tax_sorted)
## [1] 5728   46
tb18_ss1154_tax_sorted[1:5,1:5]
##       MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1      0      0      0      0                  0
## OTU_2      0      0      0      3                  0
## OTU_3      0      0      0      0                  0
## OTU_5      0      0      0      0                  0
## OTU_6      0      1      0      0                  0

Selection of phototrophs:

tb18_phototrophs <- tb18_ss1154_tax_sorted[which(tb18_ss1154_tax_sorted$classif != "NA"),]

dim(tb18_phototrophs)
## [1] 1905   46
#create a table per group and count in how many samples they occur. 
Dinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
##        MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14      0      0      0      0                  0
## OTU_19      0      0      0      0                  0
## OTU_29      0      0      0      0                  0
## OTU_72      0      0      0      0                  0
## OTU_74      0      0      0      0                  0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
##        MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14      0      0      0      0                  0
## OTU_19      0      0      0      0                  0
## OTU_29      0      0      0      0                  0
## OTU_72      0      0      0      0                  0
## OTU_74      0      0      0      0                  0
dim(Dinophyceae_tb_occur)
## [1] 1044   43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 43
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])

Prasinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 32
Chrysophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 41
Pelagophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 27
Bacillariophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 39
Chlorarachniophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 34
Bolidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 35
Pinguiochysidales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 39
Eustigmatales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 35
Chlorophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 18
Ulvophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 2
Raphydophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 14
Phaeophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 5
Chlorodendrophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 12
Pavlovophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 25
Rhodophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 4
Rappemonads_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 36
Prasinophyceae_clade_IX_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 16
Pyramimonadaceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 21
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                         5453           1044                43
## Prymnesiophyceae                    5180            165                43
## Mamiellophyceae                     3173            102                39
## Pelagophyceae                       2206             23                43
## Prasinophyceae_clade-VII            1367             30                36
## Dictyochophyceae                     622             48                43
## Chrysophyceae                        599            134                41
## Cryptophyceae                        380             57                27
## Bacillariophyceae                    287            125                39
## Chlorarachniophyceae                 136             16                34
## Bolidophyceae                        132             21                35
## Pyramimonadaceae                     111             14                21
## other_Prasinophyceae                  93             19                32
## Eustigmatophyceae                     88             19                35
## Prasinophyceae_clade-IX               53             13                16
## Pavlovophyceae                        51              5                25
## Trebouxiophyceae                      39             30                14
## Chlorophyceae                         25             20                18
## Nephroselmidophyceae                  19              7                12
## Xanthophyceae                          5              4                 5
## Rhodophyceae                           4              4                 4
## Ulvophyceae                            4              3                 2
## Phaeophyceae                           2              2                 2

1.6.1.2) Relative values

##   reads_per_class    OTUs_per_class samples_per_class 
##           100.000           100.000          1416.279
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                 27.225522992     54.8031496        100.000000
## Prymnesiophyceae            25.862499376      8.6614173        100.000000
## Mamiellophyceae             15.842029058      5.3543307         90.697674
## Pelagophyceae               11.014029657      1.2073491        100.000000
## Prasinophyceae_clade-VII     6.825103600      1.5748031         83.720930
## Dictyochophyceae             3.105497029      2.5196850        100.000000
## Chrysophyceae                2.990663538      7.0341207         95.348837
## Cryptophyceae                1.897248989      2.9921260         62.790698
## Bacillariophyceae            1.432922263      6.5616798         90.697674
## Chlorarachniophyceae         0.679015428      0.8398950         79.069767
## Bolidophyceae                0.659044386      1.1023622         81.395349
## Pyramimonadaceae             0.554196415      0.7349081         48.837209
## other_Prasinophyceae         0.464326726      0.9973753         74.418605
## Eustigmatophyceae            0.439362924      0.9973753         81.395349
## Prasinophyceae_clade-IX      0.264616306      0.6824147         37.209302
## Pavlovophyceae               0.254630785      0.2624672         58.139535
## Trebouxiophyceae             0.194717659      1.5748031         32.558140
## Chlorophyceae                0.124819012      1.0498688         41.860465
## Nephroselmidophyceae         0.094862449      0.3674541         27.906977
## Xanthophyceae                0.024963802      0.2099738         11.627907
## Rhodophyceae                 0.019971042      0.2099738          9.302326
## Ulvophyceae                  0.019971042      0.1574803          4.651163
## Phaeophyceae                 0.009985521      0.1049869          4.651163



Reads per class vs. OTUs per class:



Reads per class vs. samples in which they occurr:

1.6.2) Non-rarefied data

Let’s add the taxonomic classification by merging “tb18_tax_occur_min1154_t” with “tb18_tax”:

dim(tb18_min1154_tax_sorted)
## [1] 8349   46
tb18_min1154_tax_sorted[1:5,1:5]
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  0                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  0                  0
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  1                  0
## OTU_4                  0                  0
## OTU_5                  0                  0

Selection of phototrophs:

## [1] 2882   46
#create a table per group and count in how many samples they occur. 
Dinophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
##        TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14                  0                  0                  0
## OTU_19                  0                  0                  0
## OTU_29                  0                  1                  0
## OTU_34                  0                  0                  0
## OTU_72                  0                  0                  0
##        TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14                  0                  0
## OTU_19                  0                  0
## OTU_29                  0                  0
## OTU_34                  0                  0
## OTU_72                  0                  2
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
##        TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14                  0                  0                  0
## OTU_19                  0                  0                  0
## OTU_29                  0                  1                  0
## OTU_34                  0                  0                  0
## OTU_72                  0                  0                  0
##        TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14                  0                  0
## OTU_19                  0                  0
## OTU_29                  0                  0
## OTU_34                  0                  0
## OTU_72                  0                  2
dim(Dinophyceae_tb_occur)
## [1] 1476   43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 43
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])

Prasinophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 41
Chrysophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 43
Pelagophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 39
Bacillariophyta_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 42
Chlorarachniophyta_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 41
Bolidophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 43
Pinguiochysidales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 41
Eustigmatales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 42
Chlorophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 26
Ulvophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 3
Raphydophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 26
Phaeophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 4
Phaeothamniophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 1
Xanthophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 14
Chlorodendrophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 1
Nephroselmidophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 19
Pavlovophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 37
Rhodophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 13
Rappemonads_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 42
Prasinophyceae_clade_IX_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 29
Pyramimonadaceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 33

1.6.2.1) Absolute values

##                      reads_per_class OTUs_per_class
## Bacillariophyceae               2664            374
## Bolidophyceae                    509             25
## Chlorarachniophyceae             327             20
## Chlorophyceae                     79             45
## Chrysophyceae                   2824            240
##                              reads_per_class OTUs_per_class
## Prymnesiophyceae                       34758            177
## Dinophyceae                            23637           1476
## Mamiellophyceae                        15846            117
## Pelagophyceae                           9306             25
## Prasinophyceae_clade-VII                3575             37
## Chrysophyceae                           2824            240
## Dictyochophyceae                        2819             52
## Bacillariophyceae                       2664            374
## Cryptophyceae                           1922             95
## Bolidophyceae                            509             25
## Pyramimonadaceae                         507             19
## other_Prasinophyceae                     492             26
## Eustigmatophyceae                        359             27
## Chlorarachniophyceae                     327             20
## Pavlovophyceae                           228             13
## Prasinophyceae_clade-IX                  155             16
## Trebouxiophyceae                         121             57
## Chlorophyceae                             79             45
## Nephroselmidophyceae                      64              7
## Xanthophyceae                             21             11
## Rhodophyceae                              16             12
## Ulvophyceae                                6              5
## Phaeophyceae                               5              4
## IncertaeSedis_Archaeplastida               1              1
## Phaeothamniophyceae                        1              1
##                              samples_per_class
## Prymnesiophyceae                            43
## Dinophyceae                                 43
## Mamiellophyceae                             41
## Pelagophyceae                               43
## Prasinophyceae_clade-VII                    42
## Chrysophyceae                               43
## Dictyochophyceae                            43
## Bacillariophyceae                           42
## Cryptophyceae                               39
## Bolidophyceae                               42
## Pyramimonadaceae                            33
## other_Prasinophyceae                        41
## Eustigmatophyceae                           42
## Chlorarachniophyceae                        41
## Pavlovophyceae                              37
## Prasinophyceae_clade-IX                     29
## Trebouxiophyceae                            26
## Chlorophyceae                               26
## Nephroselmidophyceae                        19
## Xanthophyceae                               14
## Rhodophyceae                                13
## Ulvophyceae                                  3
## Phaeophyceae                                 4
## IncertaeSedis_Archaeplastida                 2
## Phaeothamniophyceae                          1

1.6.2.2) Relative values

##   reads_per_class    OTUs_per_class samples_per_class 
##           100.000           100.000          1748.837
##                              reads_per_class OTUs_per_class
## Prymnesiophyceae                3.467409e+01     6.14156836
## Dinophyceae                     2.357994e+01    51.21443442
## Mamiellophyceae                 1.580775e+01     4.05968078
## Pelagophyceae                   9.283534e+00     0.86745316
## Prasinophyceae_clade-VII        3.566369e+00     1.28383067
## Chrysophyceae                   2.817182e+00     8.32755031
## Dictyochophyceae                2.812194e+00     1.80430257
## Bacillariophyceae               2.657569e+00    12.97709924
## Cryptophyceae                   1.917360e+00     3.29632200
## Bolidophyceae                   5.077712e-01     0.86745316
## Pyramimonadaceae                5.057760e-01     0.65926440
## other_Prasinophyceae            4.908122e-01     0.90215128
## Eustigmatophyceae               3.581333e-01     0.93684941
## Chlorarachniophyceae            3.262106e-01     0.69396253
## Pavlovophyceae                  2.274496e-01     0.45107564
## Prasinophyceae_clade-IX         1.546258e-01     0.55517002
## Trebouxiophyceae                1.207079e-01     1.97779320
## Chlorophyceae                   7.880928e-02     1.56141568
## Nephroselmidophyceae            6.384549e-02     0.24288688
## Xanthophyceae                   2.094930e-02     0.38167939
## Rhodophyceae                    1.596137e-02     0.41637752
## Ulvophyceae                     5.985515e-03     0.17349063
## Phaeophyceae                    4.987929e-03     0.13879251
## IncertaeSedis_Archaeplastida    9.975858e-04     0.03469813
## Phaeothamniophyceae             9.975858e-04     0.03469813
##                              samples_per_class
## Prymnesiophyceae                    100.000000
## Dinophyceae                         100.000000
## Mamiellophyceae                      95.348837
## Pelagophyceae                       100.000000
## Prasinophyceae_clade-VII             97.674419
## Chrysophyceae                       100.000000
## Dictyochophyceae                    100.000000
## Bacillariophyceae                    97.674419
## Cryptophyceae                        90.697674
## Bolidophyceae                        97.674419
## Pyramimonadaceae                     76.744186
## other_Prasinophyceae                 95.348837
## Eustigmatophyceae                    97.674419
## Chlorarachniophyceae                 95.348837
## Pavlovophyceae                       86.046512
## Prasinophyceae_clade-IX              67.441860
## Trebouxiophyceae                     60.465116
## Chlorophyceae                        60.465116
## Nephroselmidophyceae                 44.186047
## Xanthophyceae                        32.558140
## Rhodophyceae                         30.232558
## Ulvophyceae                           6.976744
## Phaeophyceae                          9.302326
## IncertaeSedis_Archaeplastida          4.651163
## Phaeothamniophyceae                   2.325581



Reads per class vs. OTUs per class:



Reads OTUs per class vs. samples in which they occurr:

2) 16S amplicons

2.1) Data overview

Let’s read the dataset and remove the samples containing less than 36155 reads:

Table dimensions and content outline:

## [1] 28294    43
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  1                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  2                  3                  1
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  1
## OTU_2                  0                  0
## OTU_3                  0                  0
## OTU_4                  5                  2
## OTU_5                  0                  0

Minimum number of reads per station:

min(colSums(tb16_tax_occur_min36155)) 
## [1] 36155

Maximum number of reads per station:

max(colSums(tb16_tax_occur_min36155)) 
## [1] 158933

Identification of stations with higher number of reads:

amplicons_per_sample<-colSums(tb16_tax_occur_min36155)
amplicons_per_sample[which(colSums(tb16_tax_occur_min36155)>150000)]
## TARA_64_SUR_0d2_3 TARA_85_SUR_0d2_3 
##            158933            150654

Overall reads per sample:

2.2) Normalization

Let’s normalize the original dataset by randomly subsampling 36155 reads in each station:

tb16_tax_occur_min36155_t<-t(tb16_tax_occur_min36155)
tb16_tax_occur_ss36155<-rrarefy(tb16_tax_occur_min36155_t, 36155)

The normalized table shows the following dimensions and format:

## [1]    43 28294
##                    OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3     0     0     0     1     0
## TARA_109_SUR_0d2_3     1     0     0     2     0
## TARA_110_SUR_0d2_3     0     0     0     1     0
## TARA_111_SUR_0d2_3     0     0     0     1     0
## TARA_112_SUR_0d2_3     0     0     0     0     0

Its content fits with the expected normalization values (36155 reads per station):

rowSums(tb16_tax_occur_ss36155)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3 
##              36155              36155              36155 
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3 
##              36155              36155              36155 
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3 
##              36155              36155              36155 
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3 
##              36155              36155              36155 
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3 
##              36155              36155              36155 
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3 
##              36155              36155              36155 
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3 
##              36155              36155              36155 
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3  TARA_56_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_57_SUR_0d2_3  TARA_62_SUR_0d2_3  TARA_64_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_65_SUR_0d2_3  TARA_66_SUR_0d2_3  TARA_68_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_72_SUR_0d2_3  TARA_76_SUR_0d2_3  TARA_78_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_82_SUR_0d2_3  TARA_84_SUR_0d2_3  TARA_85_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_96_SUR_0d2_3  TARA_98_SUR_0d2_3  TARA_99_SUR_0d2_3 
##              36155              36155              36155 
##             MP0311             MP1517             MP1672 
##              36155              36155              36155 
##             MP2821 
##              36155

Let’s check out how many OTUs don’t appear in the new table:

length(which(colSums(tb16_tax_occur_ss36155)==0))
## [1] 8568

There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:

tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155[,-(which(colSums(tb16_tax_occur_ss36155)==0))]
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155_no_cero[mixedorder(row.names(tb16_tax_occur_ss36155_no_cero)),]
dim(tb16_tax_occur_ss36155_no_cero)
## [1]    43 19726

Datasets summary:

dim(tb16_tax)
## [1] 28294    58
dim(tb16_tax_occur)
## [1] 28294    55
dim(tb16_tax_occur_ss36155_no_cero)
## [1]    43 19726
#28294    58
#28294    55
#43 19726

2.3) General community analysis

2.3.1) Richness and evenness (Shannon index)

Most of the samples take Shannon Index values around 6:

2.3.2) Richness: OTU number

Lowest number of OTUs per sample:

## [1] 2503

Maximum number of OTUs per sample:

## [1] 5092

In most of the samples, we can identify between 600 and 700 OTUs:

plot(sort(OTUs_per_sample_16S_tax_occur_ss36155), pch=19)

boxplot(OTUs_per_sample_16S_tax_occur_ss36155, pch=19)

2.3.3) Index of evenness

2.3.3.1) Pielou’s index

pielou_evenness_16S_tax_occur_ss36155<-tb16_tax_occur_ss36155_div/log(OTUs_per_sample_16S_tax_occur_ss36155)

The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.

The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.

plot(sort(pielou_evenness_16S_tax_occur_ss36155), pch=19)

boxplot(pielou_evenness_16S_tax_occur_ss36155, pch=19)

The OTU_38, with 874 reads, is the most abundant in the overall dataset:

head(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T), n=10L)
## OTU_1754 OTU_2893  OTU_761 OTU_2428 OTU_3736 OTU_1942 OTU_2466 OTU_3271 
##    17121    13628    13145    12010    11106    10244    10043     8626 
## OTU_5398 OTU_5330 
##     8526     7736

Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:

plot(log(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T)), pch=19)

2.3.4) Abundance Models

2.3.4.1) Rank-Abundance or Dominance/Diversity Model (“radfit”)

The OTUs abundance distribution fits relativelly close to log-normal model.

2.3.4.2) Preston’s Lognormal Model

According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:

tb16_tax_occur_ss36155_prestonfit<-prestonfit(colSums(tb16_tax_occur_min36155_t))
plot(tb16_tax_occur_ss36155_prestonfit, main="Pooled species")

veiledspec(tb16_tax_occur_ss36155_prestonfit)
## Extrapolated     Observed       Veiled 
##     108452.1      24691.0      83761.1

When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:

## Extrapolated     Observed       Veiled 
##     75267.64     24691.00     50576.64

2.3.5) Rarefaction curves of rarefied and non-rarefied datasets

rarec_input<-t(as.matrix(colSums(tb16_tax_occur_ss36155_no_cero)))

#tb16_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb16_rarecurve_step1000_10000

tb16_rarecurve_step100_5700<-rarecurve(rarec_input, step = 1000, 19700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=100 & ss=5700\n(19,756 OTUs; 1,554,665 reads)\n")

#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb16_tax_occur))))
tb16_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 1000, 28200, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity non-rarefied step=1000 & ss=100000\n(28,294 OTUs; 4,285,523 reads)\n")

2.3.6) Beta diversity

2.3.6.1) Dissimilarity matrix using Bray-Curtis index:

The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.

2.3.6.2) Hierarchical clustering

The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.

(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)

2.3.6.3) Non-metric multidimensional scaling

We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.

## 
## Call:
## monoMDS(dist = tb16_tax_occur_ss36155_no_cero.bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb16_tax_occur_ss36155_no_cero, method = "bray")'
## 
## Dimensions: 2 
## Stress:     9.589501e-05 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 62 iterations: Stress nearly zero (< smin)

When implementing a most robut function for computing NMDS plots, the result is quiet the same:

## Run 0 stress 9.301068e-05 
## Run 1 stress 9.158128e-05 
## ... New best solution
## ... Procrustes: rmse 5.375076e-05  max resid 0.0002756587 
## ... Similar to previous best
## Run 2 stress 9.428448e-05 
## ... Procrustes: rmse 2.781509e-05  max resid 0.0001387135 
## ... Similar to previous best
## Run 3 stress 9.332153e-05 
## ... Procrustes: rmse 0.0001081792  max resid 0.0003003597 
## ... Similar to previous best
## Run 4 stress 9.620788e-05 
## ... Procrustes: rmse 7.880078e-05  max resid 0.0002958135 
## ... Similar to previous best
## Run 5 stress 7.292808e-05 
## ... New best solution
## ... Procrustes: rmse 8.029629e-05  max resid 0.0002395264 
## ... Similar to previous best
## Run 6 stress 9.457543e-05 
## ... Procrustes: rmse 9.02236e-05  max resid 0.0002043675 
## ... Similar to previous best
## Run 7 stress 9.195291e-05 
## ... Procrustes: rmse 7.181292e-05  max resid 0.0001653436 
## ... Similar to previous best
## Run 8 stress 9.669817e-05 
## ... Procrustes: rmse 8.197419e-05  max resid 0.0002534159 
## ... Similar to previous best
## Run 9 stress 9.961385e-05 
## ... Procrustes: rmse 5.636359e-05  max resid 0.0001619826 
## ... Similar to previous best
## Run 10 stress 9.64433e-05 
## ... Procrustes: rmse 8.938231e-05  max resid 0.0002629114 
## ... Similar to previous best
## Run 11 stress 7.614704e-05 
## ... Procrustes: rmse 7.66202e-05  max resid 0.0002242664 
## ... Similar to previous best
## Run 12 stress 9.185541e-05 
## ... Procrustes: rmse 5.891344e-05  max resid 0.0002376396 
## ... Similar to previous best
## Run 13 stress 7.979814e-05 
## ... Procrustes: rmse 7.351333e-05  max resid 0.0001877524 
## ... Similar to previous best
## Run 14 stress 9.896201e-05 
## ... Procrustes: rmse 9.046256e-05  max resid 0.0002714003 
## ... Similar to previous best
## Run 15 stress 9.814128e-05 
## ... Procrustes: rmse 2.413431e-05  max resid 7.570832e-05 
## ... Similar to previous best
## Run 16 stress 9.22882e-05 
## ... Procrustes: rmse 8.985092e-05  max resid 0.0002411534 
## ... Similar to previous best
## Run 17 stress 9.977628e-05 
## ... Procrustes: rmse 9.592451e-05  max resid 0.000260223 
## ... Similar to previous best
## Run 18 stress 9.807055e-05 
## ... Procrustes: rmse 8.39958e-05  max resid 0.00019424 
## ... Similar to previous best
## Run 19 stress 9.558017e-05 
## ... Procrustes: rmse 3.817104e-05  max resid 0.0001220175 
## ... Similar to previous best
## Run 20 stress 9.839229e-05 
## ... Procrustes: rmse 4.602517e-05  max resid 0.0001242549 
## ... Similar to previous best
## *** Solution reached
## Warning in metaMDS(tb16_tax_occur_ss36155_no_cero.bray): Stress is (nearly)
## zero - you may have insufficient data
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available

2.4) Geographical analysis

## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used

Working datasets:

  1. Community matrix: tb16_tax_occur_ss36155_no_cero
dim(tb16_tax_occur_ss36155_no_cero)
## [1]    43 19763
tb16_tax_occur_ss36155_no_cero[1:5, 1:5]
##                    OTU_1 OTU_3 OTU_4 OTU_5 OTU_7
## MP0311                 0     0     0     0     0
## MP1517                 0     0     3     0     1
## MP1672                 0     0     0     0     0
## MP2821                 1     0     0     0     1
## TARA_102_SUR_0d2_3     0     0     1     0     0
  1. Community Bray-Curtis: tb16_tax_occur_ss36155_no_cero.bray
tb16_tax_occur_ss36155_no_cero.bray<-as.matrix(tb16_tax_occur_ss36155_no_cero.bray)
## [1] 43 43
  1. Stations distances in km: geo_distances_MP_16S
dim(geo_distances_MP_16S)
## [1] 43 43

Communities quickly change their composition across geographical distances:

plot(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")

2.4.1) Mantel correlograms

Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.

mantel(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = geo_distances_MP_16S, ydis = tb16_tax_occur_ss36155_no_cero.bray) 
## 
## Mantel statistic r: 0.02826 
##       Significance: 0.279 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0873 0.1167 0.1444 0.1697 
## Permutation: free
## Number of permutations: 999

Maximum distance between samples:

## [1] 19543.94

Minimum distance between samples:

## [1] 0

Correlograms:

MP_16s_ss36155_mantel_correl_by_1000km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=1000))
plot(MP_16s_ss36155_mantel_correl_by_1000km)

MP_16s_ss36155_mantel_correl_by_100km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=100))
plot(MP_16s_ss36155_mantel_correl_by_100km)

2.5) Abundance vs. occurence

In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).

Regionally abundant OTUs (relative abundance over 0.1%):

tb16_class_prov<-cbind(otu_names=row.names(tb16_class),tb16_class)
##     otu_names mean_rabund perc_occur          class_B
## 1   OTU_10747 0.001144941   55.81395   other_bacteria
## 2   OTU_12129 0.001116639   65.11628    Synechococcus
## 3    OTU_1329 0.002241640   93.02326   other_bacteria
## 4    OTU_1676 0.002044814   88.37209  Prochlorococcus
## 5    OTU_1738 0.001929676   93.02326   other_bacteria
## 6    OTU_1754 0.011149026   93.02326  Prochlorococcus
## 7    OTU_1942 0.006453480   93.02326  Prochlorococcus
## 8     OTU_223 0.001027231   93.02326   other_bacteria
## 9      OTU_23 0.001443398   93.02326   other_bacteria
## 10   OTU_2428 0.007645377   93.02326   other_bacteria
## 11   OTU_2466 0.006429681   93.02326  Prochlorococcus
## 12   OTU_2789 0.001014366  100.00000   other_bacteria
## 13  OTU_27900 0.002379934  100.00000 Prymnesiophyceae
## 14  OTU_27901 0.003573117   81.39535 Prymnesiophyceae
## 15  OTU_27912 0.001820971   46.51163 Prymnesiophyceae
## 16  OTU_27927 0.003777663   48.83721  Mamiellophyceae
## 17  OTU_27934 0.001615139   95.34884 Prymnesiophyceae
## 18  OTU_27938 0.001353989   93.02326    Pelagophyceae
## 19  OTU_28017 0.001251717   79.06977    Pelagophyceae
## 20  OTU_28046 0.001222771   55.81395  Mamiellophyceae
## 21  OTU_28060 0.001001502   41.86047  Mamiellophyceae
## 22  OTU_28061 0.002150302   51.16279  Mamiellophyceae
## 23   OTU_2866 0.003430321   90.69767   other_bacteria
## 24   OTU_2877 0.002066683   95.34884   other_bacteria
## 25   OTU_2893 0.008587059   93.02326   other_bacteria
## 26     OTU_29 0.002464840  100.00000   other_bacteria
## 27   OTU_2980 0.001006648  100.00000   other_bacteria
## 28   OTU_3004 0.002071829   93.02326   other_bacteria
## 29   OTU_3023 0.001958621   93.02326   other_bacteria
## 30   OTU_3162 0.002286023   95.34884   other_bacteria
## 31   OTU_3172 0.002888082   88.37209   other_bacteria
## 32   OTU_3173 0.001013080   83.72093   other_bacteria
## 33   OTU_3187 0.003630364  100.00000   other_bacteria
## 34   OTU_3197 0.001815825   81.39535   other_bacteria
## 35   OTU_3198 0.001436966   93.02326   other_bacteria
## 36   OTU_3207 0.001030447   90.69767   other_bacteria
## 37   OTU_3214 0.001204761   93.02326   other_bacteria
## 38   OTU_3240 0.001783021   93.02326   other_bacteria
## 39   OTU_3259 0.002529805   58.13953   other_bacteria
## 40   OTU_3271 0.005586413   90.69767  Prochlorococcus
## 41   OTU_3308 0.002082121   97.67442    Synechococcus
## 42   OTU_3311 0.002843056   90.69767  Prochlorococcus
## 43   OTU_3326 0.001315396   90.69767   other_bacteria
## 44    OTU_333 0.001588123  100.00000   other_bacteria
## 45   OTU_3347 0.001297386   97.67442   other_bacteria
## 46   OTU_3680 0.001653732   93.02326   other_bacteria
## 47   OTU_3731 0.001496142  100.00000   other_bacteria
## 48   OTU_3736 0.007181611   93.02326  Prochlorococcus
## 49   OTU_3789 0.001064538  100.00000   other_bacteria
## 50   OTU_3803 0.001385507  100.00000   other_bacteria
## 51   OTU_3810 0.001757292   93.02326   other_bacteria
## 52   OTU_4036 0.001322471   97.67442   other_bacteria
## 53   OTU_4048 0.002347129  100.00000   other_bacteria
## 54   OTU_4083 0.001072257   95.34884   other_bacteria
## 55   OTU_4366 0.001663381  100.00000   other_bacteria
## 56   OTU_4560 0.001658878  100.00000   other_bacteria
## 57   OTU_4664 0.001741211  100.00000   other_bacteria
## 58   OTU_4926 0.001751503   93.02326   other_bacteria
## 59   OTU_4991 0.001551460   95.34884   other_bacteria
## 60    OTU_520 0.001058749   93.02326   other_bacteria
## 61   OTU_5211 0.001081262   95.34884   other_bacteria
## 62   OTU_5222 0.001090910   93.02326   other_bacteria
## 63   OTU_5266 0.001177102   93.02326   other_bacteria
## 64   OTU_5268 0.001193826  100.00000   other_bacteria
## 65   OTU_5288 0.001213123   90.69767  Prochlorococcus
## 66   OTU_5301 0.001794599   95.34884   other_bacteria
## 67   OTU_5327 0.001590696   93.02326   other_bacteria
## 68   OTU_5330 0.004964414   93.02326  Prochlorococcus
## 69   OTU_5352 0.003766728   58.13953   other_bacteria
## 70   OTU_5355 0.001160379  100.00000   other_bacteria
## 71   OTU_5398 0.005423033   90.69767  Prochlorococcus
## 72   OTU_5403 0.002407593   90.69767   other_bacteria
## 73   OTU_5414 0.001204118  100.00000   other_bacteria
## 74    OTU_542 0.001498072   93.02326   other_bacteria
## 75   OTU_5428 0.001070970  100.00000   other_bacteria
## 76   OTU_5432 0.002228776   93.02326   other_bacteria
## 77   OTU_5441 0.002136152   93.02326   other_bacteria
## 78   OTU_5445 0.001281948  100.00000   other_bacteria
## 79   OTU_5447 0.002478991  100.00000   other_bacteria
## 80   OTU_5449 0.001256219   93.02326   other_bacteria
## 81   OTU_5491 0.001247857  100.00000   other_bacteria
## 82   OTU_5493 0.001173886   95.34884   other_bacteria
## 83   OTU_5499 0.001057463   93.02326   other_bacteria
## 84   OTU_5500 0.001371357  100.00000   other_bacteria
## 85   OTU_5511 0.001293526   93.02326   other_bacteria
## 86   OTU_5519 0.001240782   48.83721   other_bacteria
## 87   OTU_5523 0.001213123  100.00000   other_bacteria
## 88   OTU_5564 0.001316682   93.02326   other_bacteria
## 89   OTU_5586 0.001207334   95.34884   other_bacteria
## 90   OTU_5607 0.001467197   93.02326   other_bacteria
## 91   OTU_5624 0.001290953   95.34884   other_bacteria
## 92   OTU_5633 0.002238424   95.34884   other_bacteria
## 93   OTU_5635 0.002038381  100.00000   other_bacteria
## 94   OTU_5651 0.001368140   93.02326   other_bacteria
## 95   OTU_5653 0.001828047   93.02326   other_bacteria
## 96   OTU_5662 0.001409950   90.69767   other_bacteria
## 97   OTU_5665 0.001334693   93.02326   other_bacteria
## 98   OTU_5667 0.001355276   93.02326   other_bacteria
## 99   OTU_5691 0.001470413   93.02326   other_bacteria
## 100  OTU_5727 0.002715698   93.02326   other_bacteria
## 101  OTU_5741 0.001373929  100.00000   other_bacteria
## 102  OTU_5742 0.001326974   93.02326   other_bacteria
## 103  OTU_5752 0.001840911   90.69767  Prochlorococcus
## 104  OTU_5772 0.001898801  100.00000   other_bacteria
## 105  OTU_5786 0.001188681   83.72093    Synechococcus
## 106  OTU_5800 0.001009864   93.02326   other_bacteria
## 107  OTU_5804 0.001425387   93.02326   other_bacteria
## 108  OTU_5815 0.001025301  100.00000   other_bacteria
## 109  OTU_5817 0.001042025  100.00000   other_bacteria
## 110  OTU_5821 0.001091553   93.02326   other_bacteria
## 111  OTU_5823 0.001253003   93.02326   other_bacteria
## 112  OTU_5879 0.001743784   93.02326   other_bacteria
## 113  OTU_5880 0.001172600   90.69767   other_bacteria
## 114  OTU_5911 0.001307034  100.00000   other_bacteria
## 115  OTU_5925 0.001328260   86.04651   other_bacteria
## 116  OTU_5942 0.001259435  100.00000   other_bacteria
## 117  OTU_5961 0.001958621  100.00000   other_bacteria
## 118  OTU_5968 0.001397086  100.00000   other_bacteria
## 119  OTU_5976 0.001164881   93.02326   other_bacteria
## 120  OTU_5978 0.001993355  100.00000   other_bacteria
## 121  OTU_5982 0.001111493   93.02326   other_bacteria
## 122  OTU_5985 0.001362351  100.00000   other_bacteria
## 123  OTU_5988 0.001011150   93.02326   other_bacteria
## 124  OTU_6047 0.001099915  100.00000   other_bacteria
## 125  OTU_6067 0.001894942  100.00000   other_bacteria
## 126  OTU_6083 0.003165955   86.04651    Synechococcus
## 127  OTU_6102 0.001303175   93.02326   other_bacteria
## 128  OTU_6155 0.001512866  100.00000   other_bacteria
## 129  OTU_6164 0.002118784   93.02326   other_bacteria
## 130  OTU_6193 0.001176459  100.00000   other_bacteria
## 131   OTU_635 0.003025732   93.02326   other_bacteria
## 132   OTU_709 0.003061110   93.02326   other_bacteria
## 133  OTU_7407 0.001341125   90.69767   other_bacteria
## 134  OTU_7455 0.001168097  100.00000   other_bacteria
## 135   OTU_759 0.001018226   86.04651    Synechococcus
## 136   OTU_761 0.008529169   93.02326  Prochlorococcus
## 137   OTU_790 0.001005361  100.00000   other_bacteria
## 138  OTU_7987 0.001436966  100.00000   other_bacteria
## 139  OTU_8011 0.003102276   83.72093   other_bacteria
## 140  OTU_8146 0.001164238  100.00000   other_bacteria
## 141  OTU_8153 0.001957335   93.02326   other_bacteria
## 142  OTU_8163 0.002039668  100.00000   other_bacteria
## 143  OTU_8367 0.001419598  100.00000   other_bacteria
## 144  OTU_8377 0.001168741  100.00000   other_bacteria
## 145  OTU_8436 0.001075473  100.00000   other_bacteria
## 146  OTU_8458 0.001327617  100.00000   other_bacteria
## 147  OTU_9255 0.001645371   86.04651    Synechococcus
## 148  OTU_9267 0.001112780   83.72093    Synechococcus
## 149   OTU_934 0.001139795  100.00000   other_bacteria
## [1] 149   5

Proportion of regionally abundant OTUs (%):

## [1] 0.7539341

Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):

otu_tb16_ss1154_cosmop_sorted_prov<-cbind(otu_names=row.names(otu_tb16_ss36155_cosmop_sorted),otu_tb16_ss36155_cosmop_sorted)
##     otu_names mean_rabund perc_occur          class_B
## 1    OTU_1329 0.002241640   93.02326   other_bacteria
## 2    OTU_1676 0.002044814   88.37209  Prochlorococcus
## 3    OTU_1738 0.001929676   93.02326   other_bacteria
## 4    OTU_1754 0.011149026   93.02326  Prochlorococcus
## 5    OTU_1942 0.006453480   93.02326  Prochlorococcus
## 6     OTU_223 0.001027231   93.02326   other_bacteria
## 7      OTU_23 0.001443398   93.02326   other_bacteria
## 8    OTU_2428 0.007645377   93.02326   other_bacteria
## 9    OTU_2466 0.006429681   93.02326  Prochlorococcus
## 10   OTU_2789 0.001014366  100.00000   other_bacteria
## 11  OTU_27900 0.002379934  100.00000 Prymnesiophyceae
## 12  OTU_27901 0.003573117   81.39535 Prymnesiophyceae
## 13  OTU_27934 0.001615139   95.34884 Prymnesiophyceae
## 14  OTU_27938 0.001353989   93.02326    Pelagophyceae
## 15   OTU_2866 0.003430321   90.69767   other_bacteria
## 16   OTU_2877 0.002066683   95.34884   other_bacteria
## 17   OTU_2893 0.008587059   93.02326   other_bacteria
## 18     OTU_29 0.002464840  100.00000   other_bacteria
## 19   OTU_2980 0.001006648  100.00000   other_bacteria
## 20   OTU_3004 0.002071829   93.02326   other_bacteria
## 21   OTU_3023 0.001958621   93.02326   other_bacteria
## 22   OTU_3162 0.002286023   95.34884   other_bacteria
## 23   OTU_3172 0.002888082   88.37209   other_bacteria
## 24   OTU_3173 0.001013080   83.72093   other_bacteria
## 25   OTU_3187 0.003630364  100.00000   other_bacteria
## 26   OTU_3197 0.001815825   81.39535   other_bacteria
## 27   OTU_3198 0.001436966   93.02326   other_bacteria
## 28   OTU_3207 0.001030447   90.69767   other_bacteria
## 29   OTU_3214 0.001204761   93.02326   other_bacteria
## 30   OTU_3240 0.001783021   93.02326   other_bacteria
## 31   OTU_3271 0.005586413   90.69767  Prochlorococcus
## 32   OTU_3308 0.002082121   97.67442    Synechococcus
## 33   OTU_3311 0.002843056   90.69767  Prochlorococcus
## 34   OTU_3326 0.001315396   90.69767   other_bacteria
## 35    OTU_333 0.001588123  100.00000   other_bacteria
## 36   OTU_3347 0.001297386   97.67442   other_bacteria
## 37   OTU_3680 0.001653732   93.02326   other_bacteria
## 38   OTU_3731 0.001496142  100.00000   other_bacteria
## 39   OTU_3736 0.007181611   93.02326  Prochlorococcus
## 40   OTU_3789 0.001064538  100.00000   other_bacteria
## 41   OTU_3803 0.001385507  100.00000   other_bacteria
## 42   OTU_3810 0.001757292   93.02326   other_bacteria
## 43   OTU_4036 0.001322471   97.67442   other_bacteria
## 44   OTU_4048 0.002347129  100.00000   other_bacteria
## 45   OTU_4083 0.001072257   95.34884   other_bacteria
## 46   OTU_4366 0.001663381  100.00000   other_bacteria
## 47   OTU_4560 0.001658878  100.00000   other_bacteria
## 48   OTU_4664 0.001741211  100.00000   other_bacteria
## 49   OTU_4926 0.001751503   93.02326   other_bacteria
## 50   OTU_4991 0.001551460   95.34884   other_bacteria
## 51    OTU_520 0.001058749   93.02326   other_bacteria
## 52   OTU_5211 0.001081262   95.34884   other_bacteria
## 53   OTU_5222 0.001090910   93.02326   other_bacteria
## 54   OTU_5266 0.001177102   93.02326   other_bacteria
## 55   OTU_5268 0.001193826  100.00000   other_bacteria
## 56   OTU_5288 0.001213123   90.69767  Prochlorococcus
## 57   OTU_5301 0.001794599   95.34884   other_bacteria
## 58   OTU_5327 0.001590696   93.02326   other_bacteria
## 59   OTU_5330 0.004964414   93.02326  Prochlorococcus
## 60   OTU_5355 0.001160379  100.00000   other_bacteria
## 61   OTU_5398 0.005423033   90.69767  Prochlorococcus
## 62   OTU_5403 0.002407593   90.69767   other_bacteria
## 63   OTU_5414 0.001204118  100.00000   other_bacteria
## 64    OTU_542 0.001498072   93.02326   other_bacteria
## 65   OTU_5428 0.001070970  100.00000   other_bacteria
## 66   OTU_5432 0.002228776   93.02326   other_bacteria
## 67   OTU_5441 0.002136152   93.02326   other_bacteria
## 68   OTU_5445 0.001281948  100.00000   other_bacteria
## 69   OTU_5447 0.002478991  100.00000   other_bacteria
## 70   OTU_5449 0.001256219   93.02326   other_bacteria
## 71   OTU_5491 0.001247857  100.00000   other_bacteria
## 72   OTU_5493 0.001173886   95.34884   other_bacteria
## 73   OTU_5499 0.001057463   93.02326   other_bacteria
## 74   OTU_5500 0.001371357  100.00000   other_bacteria
## 75   OTU_5511 0.001293526   93.02326   other_bacteria
## 76   OTU_5523 0.001213123  100.00000   other_bacteria
## 77   OTU_5564 0.001316682   93.02326   other_bacteria
## 78   OTU_5586 0.001207334   95.34884   other_bacteria
## 79   OTU_5607 0.001467197   93.02326   other_bacteria
## 80   OTU_5624 0.001290953   95.34884   other_bacteria
## 81   OTU_5633 0.002238424   95.34884   other_bacteria
## 82   OTU_5635 0.002038381  100.00000   other_bacteria
## 83   OTU_5651 0.001368140   93.02326   other_bacteria
## 84   OTU_5653 0.001828047   93.02326   other_bacteria
## 85   OTU_5662 0.001409950   90.69767   other_bacteria
## 86   OTU_5665 0.001334693   93.02326   other_bacteria
## 87   OTU_5667 0.001355276   93.02326   other_bacteria
## 88   OTU_5691 0.001470413   93.02326   other_bacteria
## 89   OTU_5727 0.002715698   93.02326   other_bacteria
## 90   OTU_5741 0.001373929  100.00000   other_bacteria
## 91   OTU_5742 0.001326974   93.02326   other_bacteria
## 92   OTU_5752 0.001840911   90.69767  Prochlorococcus
## 93   OTU_5772 0.001898801  100.00000   other_bacteria
## 94   OTU_5786 0.001188681   83.72093    Synechococcus
## 95   OTU_5800 0.001009864   93.02326   other_bacteria
## 96   OTU_5804 0.001425387   93.02326   other_bacteria
## 97   OTU_5815 0.001025301  100.00000   other_bacteria
## 98   OTU_5817 0.001042025  100.00000   other_bacteria
## 99   OTU_5821 0.001091553   93.02326   other_bacteria
## 100  OTU_5823 0.001253003   93.02326   other_bacteria
## 101  OTU_5879 0.001743784   93.02326   other_bacteria
## 102  OTU_5880 0.001172600   90.69767   other_bacteria
## 103  OTU_5911 0.001307034  100.00000   other_bacteria
## 104  OTU_5925 0.001328260   86.04651   other_bacteria
## 105  OTU_5942 0.001259435  100.00000   other_bacteria
## 106  OTU_5961 0.001958621  100.00000   other_bacteria
## 107  OTU_5968 0.001397086  100.00000   other_bacteria
## 108  OTU_5976 0.001164881   93.02326   other_bacteria
## 109  OTU_5978 0.001993355  100.00000   other_bacteria
## 110  OTU_5982 0.001111493   93.02326   other_bacteria
## 111  OTU_5985 0.001362351  100.00000   other_bacteria
## 112  OTU_5988 0.001011150   93.02326   other_bacteria
## 113  OTU_6047 0.001099915  100.00000   other_bacteria
## 114  OTU_6067 0.001894942  100.00000   other_bacteria
## 115  OTU_6083 0.003165955   86.04651    Synechococcus
## 116  OTU_6102 0.001303175   93.02326   other_bacteria
## 117  OTU_6155 0.001512866  100.00000   other_bacteria
## 118  OTU_6164 0.002118784   93.02326   other_bacteria
## 119  OTU_6193 0.001176459  100.00000   other_bacteria
## 120   OTU_635 0.003025732   93.02326   other_bacteria
## 121   OTU_709 0.003061110   93.02326   other_bacteria
## 122  OTU_7407 0.001341125   90.69767   other_bacteria
## 123  OTU_7455 0.001168097  100.00000   other_bacteria
## 124   OTU_759 0.001018226   86.04651    Synechococcus
## 125   OTU_761 0.008529169   93.02326  Prochlorococcus
## 126   OTU_790 0.001005361  100.00000   other_bacteria
## 127  OTU_7987 0.001436966  100.00000   other_bacteria
## 128  OTU_8011 0.003102276   83.72093   other_bacteria
## 129  OTU_8146 0.001164238  100.00000   other_bacteria
## 130  OTU_8153 0.001957335   93.02326   other_bacteria
## 131  OTU_8163 0.002039668  100.00000   other_bacteria
## 132  OTU_8367 0.001419598  100.00000   other_bacteria
## 133  OTU_8377 0.001168741  100.00000   other_bacteria
## 134  OTU_8436 0.001075473  100.00000   other_bacteria
## 135  OTU_8458 0.001327617  100.00000   other_bacteria
## 136  OTU_9255 0.001645371   86.04651    Synechococcus
## 137  OTU_9267 0.001112780   83.72093    Synechococcus
## 138   OTU_934 0.001139795  100.00000   other_bacteria
## [1] 138   5

Number and proportion (%) of cosmopolitan OTUs:

## [1] 138
## [1] 0.6982746

Number and proportion (%) of rare OTUs:

## [1] 13963
## [1] 70.65223

2.6) Taxonomic composition analysis

2.6.1) Normalized data

2.6.1.1) Absolute values

Let’s add the taxonomic classification by merging “tb16_tax_occur_ss36155_no_cero” with “tb16_tax”:

tb16_bacteria <- tb16_ss36155_tax_sorted[which(tb16_ss36155_tax_sorted$class_A == "other_bacteria" | tb16_ss36155_tax_sorted$class_A == "Cyanobacteria" ),]

tb16_protists <- tb16_ss36155_tax_sorted[which(tb16_ss36155_tax_sorted$class_A != "other_bacteria" &  tb16_ss36155_tax_sorted$class_A != "Cyanobacteria"),]

dim(tb16_protists)
## [1] 318  47
dim(tb16_bacteria)
## [1] 19445    47
#tb16_protists[1:5,43:47]
#tb16_bacteria[1:5,43:47]
#create a table per group and count in how many samples they occur. 
Dinophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
##           MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_27905      0      0      0      0                  0
## OTU_27950      0      0      0      0                  0
## OTU_28007      1      0      0      0                  0
## OTU_28206      0      0      0      0                  0
## OTU_28215      0      0      0      0                  0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
##           MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_27905      0      0      0      0                  0
## OTU_27950      0      0      0      0                  0
## OTU_28007      1      0      0      0                  0
## OTU_28206      0      0      0      0                  0
## OTU_28215      0      0      0      0                  0
dim(Dinophyceae_tb_occur)
## [1]  5 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 7
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])

Prasinophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 39
Chrysophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 42
Cryptomonadales_tb <- tb16_protists[which(tb16_protists$class_A == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 29
Bacillariophyta_tb <- tb16_protists[which(tb16_protists$class_A == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 35
Chlorarachniophyta_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 5
Bolidophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 29
Pinguiochysidales_tb <- tb16_protists[which(tb16_protists$class_A == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 2
Prymnesiophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 30
Eustigmatales_tb <- tb16_protists[which(tb16_protists$class_A == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 7
Chlorophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 1
Ulvophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 1
Phaeophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 1
IncertaeSedis_Archaeplastida_tb <- tb16_protists[which(tb16_protists$class_A == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 2
Rhodophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb16_protists[which(tb16_protists$class_A == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 25
MOCH_1_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb16_protists[which(tb16_protists$class_A == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb16_protists[which(tb16_protists$class_A == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 6
other_plastids_tb <- tb16_protists[which(tb16_protists$class_A == "other_plastids"),]
other_plastids_tb_occur <- other_plastids_tb[,1:43]
length(other_plastids_tb_occur[,colSums(other_plastids_tb_occur) > 0])
## [1] 21
#create a table per group and count in how many samples they occur. 
heterotrophic_bacteria_tb <- tb16_bacteria[which(tb16_bacteria$class_A == "other_bacteria"),]
heterotrophic_bacteria_tb_occur <- heterotrophic_bacteria_tb[,1:43]
length(heterotrophic_bacteria_tb_occur[,colSums(heterotrophic_bacteria_tb_occur) > 0])

Cyanobacteria_tb <- tb16_bacteria[which(tb16_bacteria$class_A == "Cyanobacteria"),]
Cyanobacteria_tb_occur <- Cyanobacteria_tb[,1:43]
length(Cyanobacteria_tb_occur[,colSums(Cyanobacteria_tb_occur) > 0])
##                      reads_per_class OTUs_per_class
## Bacillariophyceae                725            136
## Bolidophyceae                    158              5
## Chlorarachniophyceae              12              4
## Chlorodendrophyceae                1              1
## Chlorophyceae                      1              1
## Cryptophyceae                   1801             26
## Dictyochophyceae                1219              7
## Dinophyceae                       14              5
## Eustigmatophyceae                 15              2
## Mamiellophyceae                15689             14
## Pavlovophyceae                     2              1
## Pelagophyceae                   5578             10
## Phaeophyceae                       2              2
## Pinguiophyceae                     2              1
## Prymnesiophyceae               25911             50
## Pyramimonadaceae                  79              5
## Rappemonads                      121              4
## Trebouxiophyceae                   1              1
## other_Prasinophyceae            1183             21
## other_plastids                    53             22
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae               25911             50                43
## Mamiellophyceae                15689             14                30
## Pelagophyceae                   5578             10                43
## Cryptophyceae                   1801             26                29
## Dictyochophyceae                1219              7                42
## other_Prasinophyceae            1183             21                39
## Bacillariophyceae                725            136                35
## Bolidophyceae                    158              5                29
## Rappemonads                      121              4                25
## Pyramimonadaceae                  79              5                 6
## other_plastids                    53             22                21
## Eustigmatophyceae                 15              2                 7
## Dinophyceae                       14              5                 7
## Chlorarachniophyceae              12              4                 5
## Phaeophyceae                       2              2                 2
## Pavlovophyceae                     2              1                 2
## Pinguiophyceae                     2              1                 2
## Chlorodendrophyceae                1              1                 1
## Chlorophyceae                      1              1                 1
## Trebouxiophyceae                   1              1                 1
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae               25911             50                43
## Mamiellophyceae                15689             14                30
## Pelagophyceae                   5578             10                43
## Cryptophyceae                   1801             26                29
## Dictyochophyceae                1219              7                42
## other_Prasinophyceae            1183             21                39
## Bacillariophyceae                725            136                35
## Bolidophyceae                    158              5                29
## Rappemonads                      121              4                25
## Pyramimonadaceae                  79              5                 6
## Eustigmatophyceae                 15              2                 7
## Dinophyceae                       14              5                 7
## Chlorarachniophyceae              12              4                 5
## Phaeophyceae                       2              2                 2
## Pavlovophyceae                     2              1                 2
## Pinguiophyceae                     2              1                 2
## Chlorodendrophyceae                1              1                 1
## Chlorophyceae                      1              1                 1
## Trebouxiophyceae                   1              1                 1
##                reads_per_class OTUs_per_class
## Cyanobacteria           171416            344
## other_bacteria         1330682          19101
## NA                          NA             NA
## NA.1                        NA             NA
## NA.2                        NA             NA
##                reads_per_class OTUs_per_class samples_per_class
## other_bacteria         1330682          19101                43
## Cyanobacteria           171416            344                43
cyano_tb <- tb16_bacteria[which(tb16_bacteria$class_A != "other_bacteria"),]

class_summary_reads_per_class_cyano<-aggregate(rowSums(cyano_tb[1:43]), list(cyano_tb$class_B), sum)
# count the different groups
class_summary_otus_per_class_cyano<-aggregate(rowSums(cyano_tb[1:43]), list(cyano_tb$class_B), length)

## READS PER CLASS ##
attach(class_summary_reads_per_class_cyano)
class_summary_reads_per_class_cyano_order<-class_summary_reads_per_class_cyano[order(-x),]
detach(class_summary_reads_per_class_cyano)
class_summary_reads_per_class_cyano_order
##               Group.1      x
## 1     Prochlorococcus 125651
## 2       Synechococcus  41990
## 3 other_cyanobacteria   3775
#fix column names
row.names(class_summary_reads_per_class_cyano_order)<-class_summary_reads_per_class_cyano_order[,1]
class_summary_reads_per_class_cyano_order<-class_summary_reads_per_class_cyano_order[c(-1)]
colnames(class_summary_reads_per_class_cyano_order)<-c("reads_per_class")


## OTUs PER CLASS ##
attach(class_summary_otus_per_class_cyano)
class_summary_otus_per_class_cyano_order<-class_summary_otus_per_class_cyano[order(-x),]
detach(class_summary_otus_per_class_cyano)
class_summary_otus_per_class_cyano_order
##               Group.1   x
## 3 other_cyanobacteria 143
## 2       Synechococcus 102
## 1     Prochlorococcus  99
row.names(class_summary_otus_per_class_cyano_order)<-class_summary_otus_per_class_cyano_order[,1]
class_summary_otus_per_class_cyano_order<-class_summary_otus_per_class_cyano_order[c(-1)]
colnames(class_summary_otus_per_class_cyano_order)<-c("OTUs_per_class")

# create_table_merging_#OTUs_#reads
cyano_OTUs_reads <- merge(class_summary_reads_per_class_cyano_order, class_summary_otus_per_class_cyano_order, by="row.names")

row.names(cyano_OTUs_reads)<-cyano_OTUs_reads[,1]
cyano_OTUs_reads<-cyano_OTUs_reads[,-1]
colnames(cyano_OTUs_reads)<-c("reads_per_class","OTUs_per_class")
cyano_OTUs_reads[1:3,1:2]
##                     reads_per_class OTUs_per_class
## Prochlorococcus              125651             99
## Synechococcus                 41990            102
## other_cyanobacteria            3775            143
cyano_OTUs_reads<-cyano_OTUs_reads[order(cyano_OTUs_reads$reads_per_class, cyano_OTUs_reads$OTUs_per_class, decreasing = T), c(1,2)]
cyano_OTUs_reads
##                     reads_per_class OTUs_per_class
## Prochlorococcus              125651             99
## Synechococcus                 41990            102
## other_cyanobacteria            3775            143
#compute relative values
cyano_OTUs_reads_rel_abund <- cyano_OTUs_reads

cyano_OTUs_reads_rel_abund$reads_per_class<-(cyano_OTUs_reads_rel_abund$reads_per_class*100)/colSums(cyano_OTUs_reads)[1]
cyano_OTUs_reads_rel_abund$OTUs_per_class<-(cyano_OTUs_reads_rel_abund$OTUs_per_class*100)/colSums(cyano_OTUs_reads)[2]

colSums(cyano_OTUs_reads_rel_abund)
## reads_per_class  OTUs_per_class 
##             100             100
rownames(cyano_OTUs_reads_rel_abund) = c("Prochlorococcus", "Synechococcus", "Other cyanobacteria")
cyano_OTUs_reads_rel_abund
##                     reads_per_class OTUs_per_class
## Prochlorococcus           73.301792       28.77907
## Synechococcus             24.495963       29.65116
## Other cyanobacteria        2.202245       41.56977
cyano_OTUs_reads_rel_abund["cyano_group"]<-NA
cyano_OTUs_reads_rel_abund$cyano_group<-c("Prochlorococcus", "Synechococcus", "Other cyanobacteria")

cyano_OTUs_reads_rel_abund2<-read.table("input/cyano_histograms_data.txt", head=TRUE)
cyano_OTUs_reads_rel_abund2
##                 group     value       data
## 1     Prochlorococcus 73.479888 % of reads
## 2     Prochlorococcus 28.323700  % of OTUs
## 3       Synechococcus 24.288471 % of reads
## 4       Synechococcus 30.346820  % of OTUs
## 5 other_cyanobacteria  2.231642 % of reads
## 6 other_cyanobacteria 41.329480  % of OTUs
ggplot(cyano_OTUs_reads_rel_abund2, aes(x=group, y=value, fill=data)) + 
  geom_bar(position="dodge", stat="identity") +
  scale_y_continuous(limits=c(-1,100), breaks=c(25,50,75,100)) +
  scale_fill_manual(values=c("#000000", "cadetblue3")) +
  labs(x="", y="% \n") + 
  theme(axis.text=element_text(size=14), axis.title=element_text(size=16)) + 
  theme(legend.title = element_blank(), legend.position=c(0.85,0.85), legend.text=element_text(size=14)) +
  theme(axis.title.y = element_text(size = rel(1.5), angle = 0, vjust=0.97))

2.6.1.2) Relative values

##   reads_per_class    OTUs_per_class samples_per_class 
##          100.0000          100.0000          911.6279
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae        1.157103e+01        7.81250        100.000000
## Mamiellophyceae         7.006207e+00        2.18750         69.767442
## Pelagophyceae           2.490957e+00        1.56250        100.000000
## Cryptophyceae           8.042692e-01        4.06250         67.441860
## Dictyochophyceae        5.443665e-01        1.09375         97.674419
## other_Prasinophyceae    5.282901e-01        3.28125         90.697674
## Bacillariophyceae       3.237619e-01       21.25000         81.395349
## Bolidophyceae           7.055776e-02        0.78125         67.441860
## Rappemonads             5.403474e-02        0.62500         58.139535
## Pyramimonadaceae        3.527888e-02        0.78125         13.953488
## Eustigmatophyceae       6.698522e-03        0.31250         16.279070
## Dinophyceae             6.251954e-03        0.78125         16.279070
## Chlorarachniophyceae    5.358817e-03        0.62500         11.627907
## Phaeophyceae            8.931362e-04        0.31250          4.651163
## Pavlovophyceae          8.931362e-04        0.15625          4.651163
## Pinguiophyceae          8.931362e-04        0.15625          4.651163
## Chlorodendrophyceae     4.465681e-04        0.15625          2.325581
## Chlorophyceae           4.465681e-04        0.15625          2.325581
## Trebouxiophyceae        4.465681e-04        0.15625          2.325581
## Cyanobacteria           7.654892e+01       53.75000        100.000000



Reads per class vs. OTUs per class:



Reads per class vs. samples in which they occurr:

2.6.2) Non-rarefied data

Let’s add the taxonomic classification by merging “tb16_tax_occur_min36155_t” with “tb16_tax”:

Selection of phototrophs:

## [1]  0 47
## [1] 318  47
## [1] 19445    47
#create a table per group and count in how many samples they occur. 
Dinophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
##           TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_27905                  0                  0                  0
## OTU_27950                  0                  0                  0
## OTU_28007                  0                  0                  0
## OTU_28206                  0                  1                  0
## OTU_28215                  0                  0                  0
##           TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_27905                  0                  1
## OTU_27950                  0                  0
## OTU_28007                  0                  0
## OTU_28206                  0                  0
## OTU_28215                  0                  0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
##           TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_27905                  0                  0                  0
## OTU_27950                  0                  0                  0
## OTU_28007                  0                  0                  0
## OTU_28206                  0                  1                  0
## OTU_28215                  0                  0                  0
##           TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_27905                  0                  1
## OTU_27950                  0                  0
## OTU_28007                  0                  0
## OTU_28206                  0                  0
## OTU_28215                  0                  0
dim(Dinophyceae_tb_occur)
## [1]  6 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 11
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])

Prasinophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 41
Chrysophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 31
Bacillariophyta_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 39
Chlorarachniophyta_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 8
Bolidophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 34
Pinguiochysidales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 3
Prymnesiophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 31
Eustigmatales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 9
Chlorophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 2
Ulvophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 1
Phaeophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 2
IncertaeSedis_Archaeplastida_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 2
Rhodophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 37
MOCH_1_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 11
other_plastids_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "other_plastids"),]
other_plastids_tb_occur <- other_plastids_tb[,1:43]
length(other_plastids_tb_occur[,colSums(other_plastids_tb_occur) > 0])
## [1] 29
## [1] 43
## [1] 43
##                      reads_per_class OTUs_per_class
## Bacillariophyceae               1777            168
## Bolidophyceae                    312              7
## Chlorarachniophyceae              33              5
## Chlorodendrophyceae                2              2
## Chlorophyceae                      2              2
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae               55897             50                43
## Mamiellophyceae                32273             15                31
## Pelagophyceae                  11504             10                43
## Cryptophyceae                   3872             28                31
## Dictyochophyceae                2954              7                43
## other_Prasinophyceae            2694             26                41
## Bacillariophyceae               1777            168                39
## Bolidophyceae                    312              7                34
## Rappemonads                      249              4                37
## Pyramimonadaceae                 147              7                11
## other_plastids                    93             28                29
## Chlorarachniophyceae              33              5                 8
## Dinophyceae                       25              6                11
## Eustigmatophyceae                 25              2                 9
## Pinguiophyceae                     3              1                 3
## Chlorodendrophyceae                2              2                 2
## Chlorophyceae                      2              2                 2
## Phaeophyceae                       2              2                 2
## Pavlovophyceae                     2              1                 2
## Trebouxiophyceae                   1              1                 1
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae               55897             50                43
## Mamiellophyceae                32273             15                31
## Pelagophyceae                  11504             10                43
## Cryptophyceae                   3872             28                31
## Dictyochophyceae                2954              7                43
## other_Prasinophyceae            2694             26                41
## Bacillariophyceae               1777            168                39
## Bolidophyceae                    312              7                34
## Rappemonads                      249              4                37
## Pyramimonadaceae                 147              7                11
## Chlorarachniophyceae              33              5                 8
## Dinophyceae                       25              6                11
## Eustigmatophyceae                 25              2                 9
## Pinguiophyceae                     3              1                 3
## Chlorodendrophyceae                2              2                 2
## Chlorophyceae                      2              2                 2
## Phaeophyceae                       2              2                 2
## Pavlovophyceae                     2              1                 2
## Trebouxiophyceae                   1              1                 1
##                reads_per_class OTUs_per_class
## Cyanobacteria           400768            364
## other_bacteria         3171424          23955
## NA                          NA             NA
## NA.1                        NA             NA
## NA.2                        NA             NA



OTUs per class vs. samples in which they occur - PROTISTS vs. CYANOBACTERIA [PLOT DESCRIPTION]

##   reads_per_class    OTUs_per_class samples_per_class 
##           100.000           100.000          1013.953
##                      reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae        1.090584e+01      7.0621469        100.000000
## Mamiellophyceae         6.296655e+00      2.1186441         72.093023
## Pelagophyceae           2.244499e+00      1.4124294        100.000000
## Cryptophyceae           7.554503e-01      3.9548023         72.093023
## Dictyochophyceae        5.763430e-01      0.9887006        100.000000
## other_Prasinophyceae    5.256155e-01      3.6723164         95.348837
## Bacillariophyceae       3.467033e-01     23.7288136         90.697674
## Bolidophyceae           6.087306e-02      0.9887006         79.069767
## Rappemonads             4.858138e-02      0.5649718         86.046512
## Pyramimonadaceae        2.868058e-02      0.9887006         25.581395
## Chlorarachniophyceae    6.438497e-03      0.7062147         18.604651
## Dinophyceae             4.877649e-03      0.8474576         25.581395
## Eustigmatophyceae       4.877649e-03      0.2824859         20.930233
## Pinguiophyceae          5.853179e-04      0.1412429          6.976744
## Chlorodendrophyceae     3.902119e-04      0.2824859          4.651163
## Chlorophyceae           3.902119e-04      0.2824859          4.651163
## Phaeophyceae            3.902119e-04      0.2824859          4.651163
## Pavlovophyceae          3.902119e-04      0.1412429          4.651163
## Trebouxiophyceae        1.951060e-04      0.1412429          2.325581
## Cyanobacteria           7.819223e+01     51.4124294        100.000000



Reads per class vs. OTUs per class:



Reads per class vs. samples in which they occurr: